---
title: 'Replication Code for Reducing Emissions and Air Pollution from Brick Manufacturing Evidence from Bangladesh'
author: "Nina Brooks"
date: "2024-12-08"
output: html_document
editor_options: 
  chunk_output_type: console
---


# Set-up
```{r setup, include=FALSE}
knitr::opts_chunk$set(
    echo = FALSE, warning = FALSE, message = FALSE, cache = FALSE,
    eval = TRUE, results = "asis", out.width = "80%", fig.align = "center"
)

rm(list = ls())
require(groundhog)
pkgs <- c("tabulator", "broom", "fixest", "ggsci", "here", "janitor", "knitr",
          "modelsummary", "kableExtra", "gtsummary", "readxl", "patchwork", 
          "openxlsx", "tidylog", "tidyverse")
groundhog.library(pkgs, "2023-09-01")

# set modelsummary options
f <- function(x) format(round(x, 3), big.mark=",") # format function for modelsummary
options(modelsummary_format_numeric_latex = "plain")

# load datasets

# Baseline
load(here("data/baseline.RData"))

# Outcome data (merged kiln performance assessment + endline)
load(here("data/endline_kpm_long.RData"))

endline_kpm_long <- endline_kpm_long %>%
    mutate(workers_cooking_fuel = case_when(
        workers_cooking_fuel == "yes" ~ 1,
        T ~ 0
    ))

```



# Main analysis

## Adoption of technical intervention

```{r adoption}
adopt_count <- endline_kpm_long %>% 
    group_by(treatment, adopter_kpm) %>% 
    count() %>%
    filter(adopter_kpm == TRUE) %>%
    ungroup() %>%
    select(-adopter_kpm)

adopt <- feols(adopter_kpm ~ treatment | blocks, 
            vcov = "hetero",
               data = endline_kpm_long)

adopt_reg <- tidy(adopt) %>%
    mutate(
        treatment = case_when(
            term == "treatmentincentive" ~ "incentive",
            T ~ "technical"),
        estimate = signif(estimate, 2),
        std.error = signif(std.error, 2),
        p.value = format(round(p.value, 4), scientific = FALSE)
        )

adoption_plot <- endline_kpm_long %>%
    group_by(treatment) %>%
    summarise(
        adopter_kpm = 100*mean(adopter_kpm),
        kilns = n()
    ) %>%
    left_join(adopt_count, by = c("treatment")) %>%
    left_join(adopt_reg, by = "treatment") %>%
    mutate(
        lab_height = c(10, 40, 40),
        adopt_lab = paste0(round(adopter_kpm, 1), "%")) %>%
    mutate(
        n = paste0(n),
        treatment = str_to_title(treatment),
        axisl = paste0(treatment, "\nN = ", kilns,  ", Adopted = ", n),
        axisl = factor(axisl, ordered = T,
                           levels = c("Control\nN = 92, Adopted = 18", 
                                      "Technical\nN = 89, Adopted = 59",
                                      "Incentive\nN = 95, Adopted = 61"),
                           labels = c("Control\nN = 92, Adopted = 18",  
                                      "Technical\nN = 89, Adopted = 59",
                                      "Technical+\nN = 95, Adopted = 61"))
    ) %>%
    ggplot(aes(x = axisl, y = adopter_kpm, fill = treatment)) +
        geom_bar(stat = "identity", width = 0.75) +
        geom_text(aes(x = axisl, y = lab_height, label = adopt_lab),
                  color = "white", fontface = "bold", size = 4,
                  nudge_y = 2) +
        labs(
            x = NULL,
            y = "Percentage (%)"
        ) +
        scale_fill_jama() +
        theme_minimal() +
        theme(
            legend.position = "none",
            panel.grid.major.x = element_blank()
        )

ggsave(plot = adoption_plot,
       filename = here("output/fig_2.pdf"),
       width = 5, height = 4)

```

## Bundled ITT & TOT results
```{r primary-outcomes}
control_means <- endline_kpm_long %>%
    filter(treatment == "control") %>%
    select(sec, co2e_annual_production, pm25_annual_production, 
                   fuel_spending_per_brick_kpm__ownprice, 
                   fuel_spending_annual_prod_kpm_bdt__ownprice,
                   total_revenue_million_bdt_endline__median) %>%
    summarise_all(mean) %>%
    pivot_longer(
        cols = everything(),
        names_to = "outcome",
        values_to = "control_mean"
    )

results_itt <- feols(c(sec, co2e_annual_production, pm25_annual_production, 
                   fuel_spending_per_brick_kpm__ownprice, 
                   fuel_spending_annual_prod_kpm_bdt__ownprice,
                   total_revenue_lakh_bdt_endline__median
                   ) ~ treatment_bundled | blocks,
                 vcov = "hetero",
                 data = endline_kpm_long) %>%
    lapply(tidy, conf.int = T) %>%
    bind_rows(.id = "outcome")

results_tot <- feols(c(sec, co2e_annual_production, pm25_annual_production, 
                   fuel_spending_per_brick_kpm__ownprice, 
                   fuel_spending_annual_prod_kpm_bdt__ownprice,
                   total_revenue_lakh_bdt_endline__median)  ~ 1 | blocks | adopter_kpm ~ treatment,
                 vcov = "hetero",
                 data = endline_kpm_long) %>%
    lapply(tidy, conf.int = T) %>%
    bind_rows(.id = "outcome")

results <- bind_rows(results_itt_df, results_iv_df) %>%
    mutate(
        model = case_when(
            term  == "treatment_bundledg" ~ "ITT",
            T ~ "IV"
        ),
        outcome = str_remove(outcome, "lhs: ")) %>%
     left_join(control_means, by = "outcome") %>%
    mutate(
        control_mean = case_when(
            outcome %in% c("co2e_annual_production","fuel_spending_annual_prod_kpm_bdt__ownprice",
                                    "total_revenue_million_bdt_endline__median")  ~ round(control_mean, 1),
            T ~ round(control_mean, 2)),
        label = factor(outcome, ordered = T,
                         levels = c("sec", "co2e_annual_production",
                                    "pm25_annual_production",
                                    "fuel_spending_per_brick_kpm__ownprice", 
                                    "fuel_spending_annual_prod_kpm_bdt__ownprice",
                                    "total_revenue_million_bdt_endline__median"),
                         labels = c(paste0("Specific Energy Consumption", "\n(MJ/kg fired brick)", "\nControl Mean: ", control_mean[1]), 
                                    paste0("CO2 Emissions", "\n(tons/kiln/season)", "\nControl Mean: ", control_mean[2]),
                                    paste0("PM2.5 Emissions", "\n(tons/kiln/season)", "\nControl Mean: ", control_mean[3]),
                                    paste0("Fuel Costs per Brick", "\n(BDT/brick)", "\nControl Mean: ", control_mean[4]),
                                    paste0("Total Fuel Costs", "\n(million BDT/kiln/season)", "\nControl Mean: ", control_mean[5]),
                                    paste0("Value of Production", "\n(million BDT/kiln/season)", "\nControl Mean: ", control_mean[6]))
                    )

        )

panel_a <- results %>%
    filter(
        outcome %in% c("sec", "co2e_annual_production", "pm25_annual_production")
    ) %>%
    ggplot(aes(x = model, y = estimate, color = model)) +
        geom_point() +
        geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                      width = 0) +
        geom_hline(aes(yintercept = 0), linetype = "dashed") +
        facet_wrap(~label, scales = "free_y"
                   ) +
        scale_color_jama() +
        theme_bw() +
        theme(
            legend.position = "none",
            panel.grid.major.x = element_blank(),
            panel.grid.minor = element_blank(),
            panel.grid.major.y = element_line(color = "white"),
            axis.text.x = element_text(face = "bold", size = 11),
            strip.text = element_text(size = 10),
            strip.background = element_blank(),
            plot.title = element_text(face = "bold", hjust = 0, vjust = 0),
            plot.margin = unit(c(30,0,20,10), "pt"),
            plot.background = element_blank(),
            panel.background = element_blank()
        ) +
        labs(
            x = NULL,
            y = "Treatment Effect"
        )

panel_b <- results %>%
    filter(
        outcome %in% c("fuel_spending_per_brick_kpm__ownprice", 
                       "fuel_spending_annual_prod_kpm_bdt__ownprice",
                       "total_revenue_million_bdt_endline__median")
    ) %>%
    ggplot(aes(x = model, y = estimate, color = model)) +
        geom_point() +
        geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                      width = 0) +
        geom_hline(aes(yintercept = 0), linetype = "dashed") +
        facet_wrap(~label, scales = "free_y"
                   ) +
        scale_color_jama() +
        theme_bw() +
        theme(
            legend.position = "none",
            panel.grid.major.x = element_blank(),
            panel.grid.minor = element_blank(),
            panel.grid.major.y = element_line(color = "white"),
            axis.text.x = element_text(face = "bold", size = 11),
            strip.text = element_text(size = 10),
            strip.background = element_blank(),
            plot.title = element_text(face = "bold", hjust = 0, vjust = 0),
            plot.margin = unit(c(30,0,10,10), "pt"),
            plot.background = element_blank(),
            panel.background = element_blank()
        ) +
        labs(
            x = NULL,
            y = "Treatment Effect"
        )

panel_a  / panel_b +
    plot_annotation(tag_levels = list(c('A. Energy and Emissions', 
                                        'B. Economic Outcomes'), '1')) & 
  theme(plot.tag = element_text(face = "bold", hjust = 0, vjust = -1),
        plot.tag.position = c(0, 1))

ggsave(filename = here("output/fig_3.pdf"),
       width = 8, height = 5)
```

## Brick Classes
```{r brick-classes}

class_results_itt <- feols(c(pct_class_1_brick, pct_class_1_5_brick,
                             pct_class_2_brick, pct_class_3_brick,
                             pct_broken_brick) ~ treatment_bundled | blocks,
                 vcov = "hetero",
                 data = endline_kpm_long) %>%
    lapply(tidy, conf.int = T) %>%
    bind_rows(.id = "outcome")

class_results_tot <- feols(c(pct_class_1_brick, pct_class_1_5_brick,
                             pct_class_2_brick, pct_class_3_brick,
                             pct_broken_brick)  ~ 1 | blocks | adopter_kpm ~ treatment,
                 vcov = "hetero",
                 data = endline_kpm_long) %>%
    lapply(tidy, conf.int = T) %>%
    bind_rows(.id = "outcome")


class_results <- bind_rows(class_results_itt, class_results_tot) %>%
    mutate(
        model = case_when(
            term  == "treatment_bundledg" ~ "ITT",
            T ~ "IV"
        ),
        class = str_remove(outcome, "lhs: "),
        class = factor(class, ordered = T,
                         levels = c("pct_class_1_brick", "pct_class_1_5_brick",
                                    "pct_class_2_brick", "pct_class_3_brick",
                                    "pct_broken_brick"),
                         labels = c("Class 1",
                                    "Class 1.5",
                                    "Class 2",
                                    "Class 3",
                                    "Broken Bricks")
        )
    )

class_results %>%
    ggplot(aes(x = class, y = estimate, color = model)) +
        geom_point(position = position_dodge(0.3), size = 1.5) +
        geom_errorbar(aes(ymin = conf.low, ymax = conf.high, color = model),
                      position = position_dodge(0.3),
                      width = 0) +
        scale_x_discrete(labels = function(x) str_wrap(x, width = 14)) +
        scale_y_continuous(breaks = scales::pretty_breaks(5)) +
        geom_hline(yintercept = 0, linetype = "dashed") +
        scale_color_jama() +
        theme_minimal() +
        theme(
            panel.grid.major.x = element_blank(),
            panel.grid.minor = element_blank(),
            axis.text.x = element_text(face = "bold", size = 9),
            legend.text =  element_text(size = 8),
            legend.position = "bottom"
        ) +
        labs(
            x = NULL,
            y = "Treatment Effect",
            color = NULL
        )

ggsave(filename = here("output/fig_4.pdf"),
       width = 6, height = 4)
```

## Worker Results
```{r worker-results}

results_worker <- feols(c(any_worker_any_benefit, firing_team_extra_benefits__any_benefit,
                          loading_team_extra_benefits__any_benefit, any_worker_meals,
                          workers_cooking_fuel, any_worker_shed) ~ treatment | blocks,
                 vcov = "hetero",
                 data = endline_kpm_long) %>%
    lapply(tidy, conf.int = T) %>%
    bind_rows(.id = "outcome")


results_worker %>%
    mutate(
        model = case_when(
            term  == "treatmentincentive" ~ "Technical+",
            T ~ "Technical"
        ),
        outcome = str_remove(outcome, "lhs: "),
        outcome = factor(outcome, ordered = T,
                         levels = c("any_worker_any_benefit", "firing_team_extra_benefits__any_benefit", 
                                    "loading_team_extra_benefits__any_benefit", 
                                    "any_worker_meals", "workers_cooking_fuel", 
                                    "any_worker_shed"),
                         labels = c("Any benefits: any worker",
                                    "Any benefits: firing team",
                                    "Any benefits: loading team",
                                    "Any meals provided",
                                    "Cooking fuel provided",
                                    "Rest shed provided")
                         )
    ) %>% 
    ggplot(aes(x = model, y = estimate, color = model)) +
        geom_point() +
        geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                      width = 0) +
        geom_hline(aes(yintercept = 0), linetype = "dashed") +
        facet_wrap(~outcome, scales = "free_y") +
        scale_color_jama() +
        theme_bw() +
        theme(
            legend.position = "none",
            panel.grid.major.x = element_blank(),
            panel.grid.minor = element_blank(),
            axis.text.x = element_text(face = "bold", size = 11),
            strip.text = element_text(size = 10),
            strip.background = element_blank()
            #panel.border = element_rect(color = "black")
        ) +
        labs(
            x = NULL,
            y = "Treatment Effect"
        )

ggsave(filename = here("output/fig_5.pdf"),
       width = 7, height = 3.5)

```

## Costs and Benefits of CO2 reduction
```{r costs and benefits}
scc <- 185
staff_training <- 15384
kiln_training <- 31223
tech_support <- 42767

total_costs <- staff_training + kiln_training + tech_support

int_kilns_n <- sum(endline_kpm_long$treatment_bundled == "g")
cost_per_kiln <- total_costs/int_kilns_n

results_itt <- feols(c(sec, co2e_annual_production, pm25_annual_production, 
                   fuel_spending_per_brick_kpm__ownprice, 
                   fuel_spending_annual_prod_kpm_bdt__ownprice,
                   total_revenue_lakh_bdt_endline__median
                   ) ~ treatment_bundled | blocks,
                 vcov = "hetero",
                 data = endline_kpm_long) %>%
    lapply(tidy, conf.int = T) %>%
    bind_rows(.id = "outcome")

results_tot <- feols(c(sec, co2e_annual_production, pm25_annual_production, 
                   fuel_spending_per_brick_kpm__ownprice, 
                   fuel_spending_annual_prod_kpm_bdt__ownprice,
                   total_revenue_lakh_bdt_endline__median)  ~ 1 | blocks | adopter_kpm ~ treatment,
                 vcov = "hetero",
                 data = endline_kpm_long) %>%
    lapply(tidy, conf.int = T) %>%
    bind_rows(.id = "outcome")

bcr_df <- data.frame(
    "Scenario" = c("Base: ITT effect", "Conservative: ITT lower bound", "Optimistic: IV effect"),
    "Estimate" = c(results_itt$estimate[results_itt$outcome == "lhs: co2e_annual_production"],
                   results_itt$conf.high[results_itt$outcome == "lhs: co2e_annual_production"],
                   results_tot$estimate[results_tot$outcome == "lhs: co2e_annual_production"])
    ) %>%
    mutate(
        Estimate = round(Estimate*-1, 1),
        "Benefit per kiln (Estimate*SCC)" = round(Estimate*scc, 2),
        "BCR (Benefit per Kiln/$2022 Cost per Kiln)" = round(`Benefit per kiln (Estimate*SCC)`/cost_per_kiln, 2),
        "Cost per tCO2 ($2022 Cost per kiln/CO2 reduction per kiln)" = round(cost_per_kiln/Estimate, 2)
    )


# Create a new workbook
wb <- createWorkbook()

# Add a worksheet
addWorksheet(wb, "Cost and benefit analysis")

writeData(wb, "Cost and benefit analysis", "Panel A: Costs and benefits of kiln efficiency intervention", startRow = 1, startCol = 1)
writeData(wb, "Cost and benefit analysis", bcr_df, startCol = 1, startRow = 2)

# Format the headers (optional)
addStyle(wb, "Cost and benefit analysis", createStyle(textDecoration = "bold"), 
         rows = c(1, nrow(bcr_df) + 4), cols = 1, gridExpand = TRUE)


# Save the Excel file
saveWorkbook(wb, here("output/Table 1 - Panel A.xlsx"), overwrite = TRUE)


# to replicate panel B, import and inflation adjust to 2023 the data from Table 1: K. Gillingham, J. H. Stock, The Cost of Reducing Greenhouse Gas Emissions. Journal of Economic Perspectives 32, 53–72 (2018).


```

# Supplementary Analysis

## Balance Tests
```{r balance-functions}

#' Run the balance tests comparing a dependent variable over a split variable,
#' conditioning on blocks
#' 
#' @param depvar Dependent variable (LHS) to consider for balance tests
#' @param df Dataframe to run analysis from
#' @param split_var Variable to condition on (RHS) for balance
#'  Defaults to "treatment"
#' 
#' @return Dataframe with balance test results from feols, treatment differences
balance_test <- function(depvar, df, split_var = "treatment") {
    # browser()
    # print(depvar)
    # print(df)
    # Run balance test
    form <- as.formula(paste0(depvar, " ~ ", split_var, " | blocks"))
    reg <- fixest::feols(form, vcov = "hetero", data = df)
    tidy_reg <- broom::tidy(reg)
    # Add dependent variable for row name
    tidy_reg$outcome <- depvar
    # test equality of arms (for treatment split)
    if (split_var == "treatment") {
        test_z <- car::linearHypothesis(
            reg, "treatmenttechnical=treatmentincentive"
        )
        tidy_reg$treat_diff <- test_z$`Pr(>Chisq)`
    }
    tidy_reg
}

#' Prettify balance test results from printing
#' 
#' @param balance_var Balance variable (same as depvar) to format
#' @param means_sds Matrix with mean and standard deviations for depvars
#' @param tests Dataframe with balance test results
#' 
#' @return List with pretty, formatted results for a given variable
format_results <- function(balance_var, means_sds, tests) {
    # browser()
    # Get p-values for differences
    ic_p <- tests %>%
        filter(outcome == balance_var, term == "incentive") %>%
        dplyr::pull(`p.value`) %>%
        #round(digits = 2) %>%
        format(nsmall = 1, digits = 2, justify = "none")
    tc_p <- tests %>%
        filter(outcome == balance_var, term == "technical") %>%
        dplyr::pull(`p.value`) %>%
        #round(digits = 3) %>%
        format(nsmall = 1, digits = 2, justify = "none")
    it_p <- tests %>%
        filter(outcome == balance_var, term == "technical") %>%
        dplyr::pull(treat_diff) %>%
        #round(digits = 3) %>%
        format(nsmall = 1, digits = 2, justify = "none")
    # Get means and SDs
    if (balance_var == "kiln_zigzag_year"){
        relevant_col <- means_sds[, balance_var] %>%
        lapply(round, digits = 0) %>%
        lapply(format, nsmall = 0, 
               justify = "none") %>%
        lapply(trimws)
        
    } else if (balance_var == "production_cost_estimate_bdt_per_1k_bricks"){
        relevant_col <- means_sds[, balance_var] %>%
        #lapply(round, digits = 3) %>%
        lapply(format, nsmall = 1, digits = 1, big.mark = ",",
               justify = "none") %>%
        lapply(trimws)
        
    } else {
    relevant_col <- means_sds[, balance_var] %>%
        #lapply(round, digits = 3) %>%
        lapply(format, nsmall = 1, digits = 2, big.mark = ",",
               justify = "none") %>%
        lapply(trimws)
    }
    # Return row as output
    list("Balance Variable" = var_dictionary[balance_var],
         "Technical+ Mean" = relevant_col[["incentive"]][1],
         "Technical+ Std. Dev." = relevant_col[["incentive"]][2],
         "Technical Mean" = relevant_col[["technical"]][1],
         "Technical Std. Dev." = relevant_col[["technical"]][2],
         "Control Mean" = relevant_col[["control"]][1],
         "Control Std. Dev." = relevant_col[["control"]][2],
         "T+ - C (p-val)" = ic_p,
         "T - C (p-val)" = tc_p,
         "T+ - T (p-val)" = it_p)
}

#' Get the mean and standard deviation as a length-2 vector
#' 
#' @param x Vector to calculate mean and SD on
#' @param index Vector to condition on/group by
#' 
#' @return Matrix of means and SDs for each value in index
get_mean_sd <- function(x, index) {
    # Get the mean and standard deviation for each treatment group
    tapply(
        x,
        INDEX = index,
        FUN = function(x) {
            c(mean(x, na.rm = TRUE), sd(x, na.rm = TRUE))
        }
    )
}

#' Apply tests across various variables (balance tests or tests of attrition)
#' 
#' @param dat Dataframe to run tests on
#' @param vars Vector of variables to test for balance/attrition
#' @param test_type Type of test to consider (balance or attrition)
#' 
#' @return DataFrame with results for balance or attrition tests
apply_tests <- function(dat, vars, test_type = "balance") {
   # browser()
    balance_results <- lapply(
        vars, balance_test, df = dat, split_var = "treatment"
    ) %>%
        # Combine all results into a single table
        dplyr::bind_rows() %>%
        dplyr::relocate(outcome, .before = term) %>%
        # Clean up treatment variable
        dplyr::mutate_at("term", stringr::str_remove, pattern = "treatment")
    means_sds <- sapply(
        dat[vars], get_mean_sd, index = dat$treatment
    )
    # Format results for printing
    results <- vars %>%
        sapply(
            format_results,
            means_sds = means_sds,
            tests = balance_results
        ) %>%
        t()
    # For balance tests, also add count for each treatment group
    # Skip this for tests of attrition
    if (test_type == "balance") {
        results <- rbind(results, rep("", ncol(results)))
        for (trt in c("Control", "Technical+", "Technical")) {
            results[
                nrow(results), paste0(stringr::str_to_title(trt), " Mean")
            ] <- paste0("N = ", sum(dat$treatment_labeled == trt))
        }
    }
    results
}

#' Print results for LaTeX compilation
#' 
#' @param result Dataframe of balance test results, output of apply_tests
#' @param col_names Column names for LaTeX printing
#'  Passed to col.names argument in knitr::kable
#' @param alignment String of letters for how to justify columns
#'  Passed to align argument in knitr::kable
#' @param cap Caption (title) for the LaTeX table
#'  Passed to caption argument in knitr::kable
#' @param lab Label for LaTex table
#'  Passed to label argument in knitr::kable
#' 
#' @return Printing knitr::kable output (for display in knitted PDF)
get_result_tab <- function(result, col_names, alignment, cap, lab, type) {
   # browser()
    # Format results to a pretty table output
    out <- result %>%
        knitr::kable(
            #digits = 2,
            format = "latex",
            row.names = FALSE,
            col.names = col_names,
            align = alignment,
            caption = cap,
            label = lab,
            booktabs = TRUE,
            escape = FALSE,
            linesep = "") %>%
        kableExtra::kable_styling(
            full_width = FALSE,
            font_size = 9,
            latex_options = c("HOLD_position")
        ) %>%
        column_spec(1, width="9em") %>%
        column_spec(2:10, width="4em") %>%
    save_kable(
       out,
       file = here(
            "output/supplement", paste0(type, "_", lab, ".tex")
        )
   )
        
}

# This stores the actual depvars for the tests
balance_vars <- c(
    "kiln_owner_experience_years",
    "kiln_zigzag_year",
    "kiln_water_adjacent",
    "kiln_bricks_fired_lakhs",
    "kiln_circuits_completed",
    "class_1_production_share_percent",
    "production_cost_estimate_bdt_per_1k_bricks",
    "fired_brick_weight_kg",
    "total_workers",
    "higher_sec_plus",
    "highland",
    "joint_ownership"
)

# Dictionary for "pretty" names
var_dictionary <- c(
   "kiln_owner_experience_years" = "Owner Experience (Years)",
   "kiln_zigzag_year" = "Zigzag Year",
    "kiln_water_adjacent" = "Water Adjacent",
    "kiln_bricks_fired_lakhs" = "Bricks Fired (Lakhs)",
    "kiln_circuits_completed" = "Circuits Completed",
    "class_1_production_share_percent" = "Class 1 Production Share (\\%)",
    "production_cost_estimate_bdt_per_1k_bricks" =
        "Production Cost Estimate BDT (per 1K Bricks)",
    "fired_brick_weight_kg" = "Fired Brick Weight (kg)",
    "total_workers" = "Total Workers",
    "higher_sec_plus" = "Higher Secondary+",
    "highland" = "Highland",
    "joint_ownership" = "Joint Ownership",
    "all_ineligible" = "Not Operated or Exclusive Firewood Use",
    "all_drop_out" = "Dropped out (all reasons)",
    "over_50_out_of_range" = "Over 50\\% Out of Range",
    "pct_time_feeding_below_33" = "\\% Time Feeding below 33\\%",
    "over_50_and_below_33" =
        "Over 50\\% Out of Range AND \\% Time Feeding below 33\\%"
)



baseline <- baseline %>%
    mutate(treatment_labeled = case_when(
        treatment == "control" ~ "Control",
        treatment == "technical" ~ "Technical",
        T ~ "Technical+"
    ))



```

```{r balance-tests}
# Consider all different subsets for balance tests
datasets <- list()
datasets[["full"]] <- baseline
datasets[["eligible"]] <- baseline %>% filter(!all_ineligible)
datasets[["analytic"]] <- baseline %>% filter(!all_drop_out)
datasets[["under_50_out_of_range"]] <- baseline %>%
    filter(!over_50_out_of_range)
datasets[["above_33_pct_fuel_feeding"]] <- baseline %>%
    filter(!pct_time_feeding_below_33)
datasets[[
    "under_50_out_of_range_or_above_33_pct_fuel_feeding"
]] <- baseline %>% filter(!over_50_and_below_33)

output_tables <- list()
for (dataset_name in names(datasets)) {
    dataset <- datasets[[dataset_name]]
    data_balance_vars <- balance_vars
    output_tables[[dataset_name]] <- apply_tests(
        dataset, vars = data_balance_vars, test_type = "balance"
    )
}

for (tab_name in names(output_tables)) {
    results_tab <- output_tables[[tab_name]]
    # Calculate the number of observations in each subsample
    n_obs <- nrow(datasets[[tab_name]])
    # Prettify table caption (remove _, replace with " ")
    tab_cap <- paste0(
        tab_name %>%
            stringr::str_replace_all("_", " ") %>%
            stringr::str_to_title(),
        " Sample Results (N = ",
        n_obs,
        ")"
    )
    
    get_result_tab(
        results_tab,
        col_names = colnames(results_tab),
       # col_names = kableExtra::linebreak(colnames(results_tab), align = "c"),
        alignment = "lcccccccccc",
        cap = tab_cap,
        lab = tab_name,
        type = "balance"
    )
}


```

```{r attrition-tables}
attrition_vars <- c(
    "all_ineligible",
    "all_drop_out"
)

results_table <- apply_tests(
    baseline, vars = attrition_vars, test_type = "attrition"
)

get_result_tab(
    results_table,
    col_names = colnames(results_table),
    alignment = "lcccccc",
    cap = "Attrition Tests",
    lab = "attrition",
    type = "attrition"
)

```


## Adoption by Component

```{r adoption-component}

# set parameters
coefnames <- c(
    "fit_adopter_kpmTRUE" = "Adopted Intervention",
    "treatment_bundledg" = "Bundled Treatment",
    "treatmenttechnical" = "Technical Arm",
    "treatmentincentive" = "Technical+ Arm"
)

formula_stubs <- c(
    "Intention-to-Treat (Separate)" = "~ treatment | blocks",
    "Intention-to-Treat (Bundled)" = "~ treatment_bundled | blocks",
    "Instrumental Variable" = "~ 1 | blocks | adopter_kpm ~ treatment"
)


results_itt <- feols(c(adopter_kpm, change_pattern, single_fireman, ash_layer_9_in, 
                   cavity_wall, sawdust_biomass_front, 
                   n_practices_adopted) ~ treatment | blocks,
                 vcov = "hetero",
                 data = endline_kpm_long) 

names(results_itt) <- c("Adopter", "Stacking",
                        "Feeding", "Ash Layer",
                        "Cavity Wall", "Sawdust",
                        "Total Practices")
gm <- list(
        list("raw" = "nobs", "clean" = "N", "fmt" = "%.0f"))
    
modelsummary(results_itt,
             fmt = fmt_sprintf(paste0("%.2f")),
             gof_map = gm, statistic = "std.error",
             stars = TRUE,
             coef_map = coefnames,
             label = knitr::opts_current$get('label') %>%
                    str_replace_all("_", "-"),
              title = "Adoption By Intervention Component",
             #notes = "Heteroskedasticity-robust standard errors are in parentheses. Regressions include randomization strata fixed effects.",
                escape = TRUE, 
                output = "latex"
             ) %>%
    kableExtra::kable_styling(
        font_size = 11,
        latex_options = c("HOLD_position")) %>%
    footnote(
        general = "Heteroskedasticity-robust standard errors are in parentheses. Regression includes randomization strata fixed effects. Adoption (column 1) is defined as adopting both the improved stacking (column 2) and improved coal feeding (column 3) practices.",
threeparttable = TRUE, escape = FALSE, footnote_as_chunk = TRUE) %>%
    save_kable(
        file = here("output/supplement/adoption_regression.tex")
    )



```


## Arm-specific Results Tables
```{r functions, results='asis'}

#' Wrapper to easily run a feols fixed-effect regression
#' This is a helper function used in dep_var_tests
#' 
#' @param formula_stub RHS of the formula (what to regress on)
#' @param dep_var Dependent variable to consider (LHS of formula)
#' @param df Dataframe with data to run analysis on
#' @param vcov Type of standard error to use
#' 
#' @return feols object
#'  Output of feols regression/test to analyze impact of treatment/adoption
run_test <- function(formula_stub, dep_var, df, vcov) {
    feols(as.formula(paste(dep_var, formula_stub)), vcov = vcov, data = df)
}

#' Get the mean and standard deviation for a control group given a specific test
#' This is a helper function used in dep_var_tests
#' 
#' @param feols_test feols test output (for example, the output of run_test)
#' @param n_digits How many digits to round to (default: 4)
#' 
#' @return Vector of length two
#'  The first element is the control mean
#'  The second is the control standard deviation in parentheses
get_stat <- function(feols_test, n_digits = 2) {
    # The mean of all blocks is the same as the printed mean from summary
    block_coefs <- fixef(feols_test)$blocks
    # Round means + SDs, place SDs in parentheses
    mean_sd <- c(mean(block_coefs), sd(block_coefs)) %>%
        round(n_digits) %>%
        format(scientific = FALSE, nsmall = n_digits) %>%
        paste0(c("", "("), ., c("", ")"))
    names(mean_sd) <- c("mean", "sd")
    mean_sd
}

#' Run fixed effect tests on a dependent variable for certain groups and blocks
#'
#' @param var_name Pretty name for dependent variable (in names(dep_vars))
#' @param dep_vars Vector of dependent variables to search from
#'  Names of the vector will be "pretty" names (one of which is var_name)
#'  dep_vars[var_name] returns a variable in the dataframe
#' @param formula_stubs Formula ending for each test type
#'  formula_stubs <- c(
#'      "Intention-to-Treat\n (Separate)" = "~ treatment | blocks",
#'      "Intention-to-Treat\n (Bundled)" = "~ treatment_bundled | blocks"
#'      "Instrumental Variable" = "~ 1 | blocks | adopter ~ treatment"
#'  )
#' @param groups Groups to consider for each type of test
#'  Names of the splitting column in the dataframe output from feols
#'  groups <- c(
#'      "fit_adopterTRUE" = "Adopted Intervention",
#'      "treatment_bundledg" = "Bundled Treatment",
#'      "treatmenttechnical" = "Technical Group",
#'      "treatmentincentive" = "Technical+ Group"
#'  )
#' @param df Dataframe to analyze/get data from
#' @param vcov Type of standard error to use
#'  Defaults to "cluster", which is the same as if nothing was specified
#' @param hetero Whether we have a heterogeneous model (default FALSE)
#' @param hetero_var Variable with heterogeneous/mixed variable.
#'  Still need to specify appropriate formula stub
#' @param n_digits The number of digits to round to
#' @param filter_type Filter to consider
#'  filter_type is appended to the name of the output .tex and .pdf files
#' @param notes List of notes to add to modelsummary() functionality
#' 
#' @return Dataframe with output of fixed effect tests for a given var_name
#'  .tex table to output/tex
#'  .pdf of confidence intervals to output/figures
dep_var_tests <- function(var_name, dep_vars, formula_stubs, groups, df,
                          vcov = "hetero", hetero = FALSE, hetero_var = "",
                          n_digits = 4, filter_type = "", arm_iv = TRUE,
                          notes = list()) {
    #browser()
    dep_var <- dep_vars[var_name]
    
    # Run models
    if (arm_iv){
      all_mods <-  formula_stubs %>%
        lapply(run_test, dep_var = dep_var, df = df, vcov = vcov)
      
      incentive <- formula_stubs %>%
        lapply(run_test, dep_var = dep_var, 
               df = df %>% filter(treatment != "technical"), vcov = vcov)
      
       technical <- formula_stubs %>%
        lapply(run_test, dep_var = dep_var, 
               df = df %>% filter(treatment != "incentive"), vcov = vcov)
      
       all_mods <- c(all_mods, list(technical$`Instrumental Variable`),
                       list(incentive$`Instrumental Variable`))
       names(all_mods) <- c("itt", "itt_bundled", "iv", "iv_technical", "iv_technical+")
       
    } else if (hetero) {
      all_mods <-  formula_stubs %>%
        lapply(run_test, dep_var = dep_var, df = df, vcov = vcov)
      names(all_mods) <-  c("itt", "itt_bundled")
    } else{
      all_mods <-  formula_stubs %>%
        lapply(run_test, dep_var = dep_var, df = df, vcov = vcov)
      names(all_mods) <-  c("itt", "itt_bundled", "iv")
    }
  
    
    prelim_calculations <- try(
        all_mods %>%
            modelsummary(output = "modelsummary_list"),
        silent = TRUE
    )
    # If we get an error when running the test, then we have
    # a constant dep_var. As such, we cannot run the fixed-effect tests
    if (any(class(prelim_calculations) == "try-error")) {
        return(NULL)
    }
    
    # browser()
    # Get means, standard deviations for each test type
    control_mean <- df %>% 
        filter(treatment == "control") %>% 
        select(all_of(dep_var)) %>% 
        summarise_all(mean) %>% 
        rename_all(~"mean")
    
    # if (hetero) {
    #     colnames(control_mean) <- c("itt", "itt_bundled")
    # } else if (arm_iv){
    #     colnames(control_mean) <- c("itt", "itt_bundled", "iv", "iv_technical", "iv_technical+")
    # } else {
    #     colnames(control_mean) <- c("itt", "itt_bundled", "iv")
    # }
    # means <- control_mean["mean", ]
    # sds <- control_mean["sd", ]
    pct_changes <- list()
    # Get calculations for pct changes and control means
    # Extract different means for each test type
    for (test in names(all_mods)) {
        if (test == "itt") {
            #ctrl_mean <- as.numeric(means["itt"])
            ctrl_mean <- as.numeric(control_mean$mean)
            pct_changes[["technical+"]] <- (
                prelim_calculations[[test]]$tidy$estimate[1] / ctrl_mean * 100
            )
            pct_changes[["technical"]] <- (
                prelim_calculations[[test]]$tidy$estimate[2] / ctrl_mean * 100
            )
        } else if (test == "itt_bundled") {
            ctrl_mean <- as.numeric(control_mean$mean)
            #ctrl_mean <- as.numeric(means["itt_bundled"])
            pct_changes[[test]] <- (
                prelim_calculations[[test]]$tidy$estimate[1] / ctrl_mean * 100
            )
        } else if (test == "iv") {
            ctrl_mean <- as.numeric(control_mean$mean)
            #ctrl_mean <- as.numeric(means["iv"])
            pct_changes[[test]] <- (
                prelim_calculations[[test]]$tidy$estimate / ctrl_mean * 100
            )
        }else if (test == "iv_technical") {
            ctrl_mean <- as.numeric(control_mean$mean)
            #ctrl_mean <- as.numeric(means["iv_technical"])
            pct_changes[[test]] <- (
                prelim_calculations[[test]]$tidy$estimate / ctrl_mean * 100
            )
        }else if (test == "iv_technical+") {
            ctrl_mean <- as.numeric(control_mean$mean)
           #ctrl_mean <- as.numeric(means["iv_technical+"])
            pct_changes[[test]] <- (
                prelim_calculations[[test]]$tidy$estimate / ctrl_mean * 100
            )
        }
        
    }
    # browser()
    # Format percent changes for printing
    pct_changes <- unlist(pct_changes, recursive = TRUE)
    grps <- names(pct_changes)
    pct_changes <- pct_changes %>%
        # The lines below determines how many digits to round to
        # Adjust the number of digits in round() and format() -- nsmall
        # Can manually edit this if necessary
        round(1) %>%
        format(scientific = FALSE, nsmall = 1) %>%
        paste0("\\%")
   
    ctrl_mean <- as.character(round(control_mean$mean, 2))
    names(pct_changes) <- grps
    # Add additional rows to modelsummary output
    # Control mean/SD, percent changes
    if (arm_iv){
        rows <- tibble::tribble(
            ~term, ~`Intention-to-Treat (Separate)`,
            ~`Intention-to-Treat (Bundled)`, ~`Instrumental Variable`,
            ~`Instrumental Variable (Technical)`,~`Instrumental Variable (Technical+)`,
            # Control Means
            "Control Mean", ctrl_mean, ctrl_mean, ctrl_mean, 
           ctrl_mean, ctrl_mean,
            
            # Line below (line 185) has the SDs for control
            # Can comment out if you don't want it
            # "", sds["itt"], sds["itt_bundled"], sds["iv"],
            
            # % Change relative to control mean
            "Percent Change",
            paste(pct_changes["technical"], "(T)"),
            pct_changes["itt_bundled"],
            pct_changes["iv"], pct_changes["iv_technical"],pct_changes["iv_technical+"],
            
            "", paste(pct_changes["technical+"], "(T+)"), "", "", "", ""
        )
    } else {
        rows <- tibble::tribble(
            ~term, ~`Intention-to-Treat (Separate)`,
            ~`Intention-to-Treat (Bundled)`, ~`Instrumental Variable`,
            "Control Mean", ctrl_mean, ctrl_mean, ctrl_mean,
            # Line below (line 185) has the SDs for control
            # Can comment out if you don't want it
            # "", sds["itt"], sds["itt_bundled"], sds["iv"],
            "Percent Change",
            paste(pct_changes["technical"], "(T)"),
            pct_changes["itt_bundled"],
            pct_changes["iv"],
            "", paste(pct_changes["technical+"], "(T+)"), "", ""
        )
    }
    # Printing tables
    # nobs includes observation size for groupings
    #browser()
    gm <- list(
        list("raw" = "nobs", "clean" = "N", "fmt" = f))
    
    # output_table <- formula_stubs %>%
    #     lapply(run_test, dep_var = dep_var, vcov = "hetero", df = df)
    if (hetero) {
        rows <- rows %>% select(-`Instrumental Variable`)
        attr(rows, "position") <- seq(from = 15, to = 18)
        output_table <- all_mods %>%
            modelsummary(
                fmt = fmt_sprintf(paste0("%.", n_digits, "f")),
                gof_map = gm, statistic = "std.error",
                add_rows = rows, stars = TRUE, coef_map = groups, 
                caption = var_name, 
                label = knitr::opts_current$get('label') %>%
                    str_replace_all("_", "-"),
                escape = TRUE, 
                output = "latex"
            )
    } else {
        model_names <- names(rows)[-1]
        names(all_mods) <- model_names
        attr(rows, "position") <- seq(from = 9, to = 12)
        output_table <- all_mods %>%
            modelsummary(
                fmt = fmt_sprintf(paste0("%.", n_digits, "f")),
                gof_map = gm, statistic = "std.error",
                coef_map = groups, add_rows = rows, stars = TRUE,
                caption = var_name, 
                label = knitr::opts_current$get('label') %>%
                    str_replace_all("_", "-"),
                escape = TRUE, 
                output = "latex"
            )
    }
    if (arm_iv){
        output_table <- output_table %>%
            kableExtra::kable_styling(
                latex_options = c("HOLD_position", "scale_down")
            ) %>%
            column_spec(column = 2:6, width = "1.25in") %>%
            footnote(
                general  = notes,
                footnote_as_chunk = TRUE,
                fixed_small_size = TRUE,
                threeparttable = TRUE)
    } else{
        output_table <- output_table %>%
        kableExtra::kable_styling(
            latex_options = c("HOLD_position", "scale_down") %>%
            footnote(
                general  = notes,
                footnote_as_chunk = TRUE,
                fixed_small_size = TRUE,
                threeparttable = TRUE)
        )
    }
    
    # Save table to output
    if (hetero_var != "") {
        hetero_var <- paste0("_", hetero_var)
    }
     if (filter_type != "") {
        filter_type <- paste0("_", filter_type)
    }
    save_kable(
        output_table,
        file = here(
            "output/publication/supplement", paste0(dep_var, hetero_var, filter_type, ".tex")
        )
    )
    print(output_table)
    
    return(output_table)
}

# set parameters
coefnames <- c(
    "fit_adopter_kpmTRUE" = "Adopted Intervention",
    "treatment_bundledg" = "Bundled Treatment",
    "treatmenttechnical" = "Technical Arm",
    "treatmentincentive" = "Technical+ Arm"
)

formula_stubs <- c(
    "Intention-to-Treat (Separate)" = "~ treatment | blocks",
    "Intention-to-Treat (Bundled)" = "~ treatment_bundled | blocks",
    "Instrumental Variable" = "~ 1 | blocks | adopter_kpm ~ treatment"
)




```

```{r arm-specific-results}

primary_vars <- c(
    "Specific Energy Consumption (MJ/kg fired brick)\\label{tab:sec}" = "sec",
    "$\\text{CO}_{2}$ Emissions (tons)\\label{tab:co2e_annual_production}" = "co2e_annual_production",
    "$\\text{PM}_{2.5}$ Emissions (tons) \\label{tab:pm25_annual_production}" = "pm25_annual_production",
    "Class 1 Bricks (\\%) \\label{tab:pct_class_1_brick}" = "pct_class_1_brick",
    "Fuel Costs (BDT/brick) \\label{tab:fuel_spending_per_brick_kpm__ownprice}" = "fuel_spending_per_brick_kpm__ownprice", 
    "Total Fuel Costs (million BDT) \\label{tab:fuel_spending_annual_prod_kpm_bdt__ownprice}" = "fuel_spending_annual_prod_kpm_bdt__ownprice",
    "Specific Fuel Consumption (tons/100,000 bricks)\\label{tab:specific_fuel_consumption_kpm}" = "specific_fuel_consumption_kpm",
    "Annual Production (100,000 bricks) \\label{tab:annual_production_actual_lakh_bricks}" = "annual_production_actual_lakh_bricks", 
    "Circuits Completed (Total number)\\label{tab:circuits_completed}" =  "circuits_completed",
    "Value of Production Per Brick (BDT/brick) \\label{tab:revenue_per_brick_kpm__median}" =  "revenue_per_brick_kpm__median",
  
    "Coal Costs (BDT/brick) \\label{tab:coal_spending_per_brick_kpm_ownprice}" = "coal_spending_per_brick_kpm__ownprice", 
    "Total Coal Costs (million BDT) \\label{tab:coal_spending_annual_prod_kpm_bdt__ownprice}" = "coal_spending_annual_prod_kpm_bdt__ownprice",
    "Specific Coal Consumption (tons/100,000 bricks)\\label{tab:specific_coal_consumption_kpm}" = "specific_coal_consumption_kpm"
)

 
arm_specific_analysis <- lapply(
    names(primary_vars),
    dep_var_tests,
    dep_vars = primary_vars,
    formula_stubs = formula_stubs,
    groups = coefnames,
    arm_iv = TRUE,
    note = "Heteroskedasticity-robust standard errors are in parentheses. Regressions include randomization strata fixed effects. Outcome data are derived from measurements collected during kiln performance monitoring. Adoption is defined as adopting double/triple zigzag brick stacking and single fireman continuous feeding.",
    df = endline_kpm_long,
    n_digits = 2
)

vop <- c("Total Value of Production (BDT) \\label{tab:total_revenue_lakh_bdt_kpm__median_annual_production}" = "total_revenue_lakh_bdt_kpm__median_annual_production")

vop_analysis <- lapply(
    names(vop),
    dep_var_tests,
    dep_vars = vop,
    formula_stubs = formula_stubs,
    groups = coefnames,
    arm_iv = TRUE,
    note = "Heteroskedasticity-robust standard errors are in parentheses. Regressions include randomization strata fixed effects. Total value of production is calculated by applying the objective brick quality data measured during the kiln performance assessment to the annual production reported at endline. Adoption is defined as adopting double/triple zigzag brick stacking and single fireman continuous feeding.",
    df = endline_kpm_long,
    n_digits = 2
)

```

## Outcomes with Endline Data

```{r endline, eval=TRUE, results="asis"}

endline_kpm_long <-  endline_kpm_long %>% 
    mutate_at(vars(ends_with("production_share_percent")),
              as.numeric) %>%
    mutate_at(vars(ends_with("production_share_percent")),
              ~.x/100
)


endline_vars <- c(
    "Endline Class 1 Bricks (\\%)\\label{tab:class1_endline}" = "class_1_production_share_percent",
     "Endline Specific Fuel Consumption (tons/100,000 bricks)\\label{tab:sfc_endline}" = "specific_fuel_consumption_endline",
      "Endline Fuel Costs Per Brick (BDT/brick) \\label{tab:fuel_spending_per_brick_endline__ownprice}" = "fuel_spending_per_brick_endline__ownprice", 
     "Endline Total Value of Production (BDT) \\label{tab:total_revenue_lakh_bdt_endline__median}" = "total_revenue_lakh_bdt_endline__median",
     "Endline Value Per Brick (BDT)\\label{tab:revenue_per_brick_endline__median}" = "revenue_per_brick_endline__median",

    "Endline Specific Fuel Consumption (tons/100,000 bricks)\\label{tab:specific_fuel_consumption_endline}" = "specific_fuel_consumption_endline"
)



endline_analysis <- lapply(
    names(endline_vars),
    dep_var_tests,
    dep_vars = endline_vars,
    formula_stubs = formula_stubs,
    groups = coefnames,
    arm_iv = TRUE,
    note = "Heteroskedasticity-robust standard errors are in parentheses. Regressions include randomization strata fixed effects. Outcome data are derived from kiln owner self-reports at endline. Adoption is defined as adopting double/triple zigzag brick stacking and single fireman continuous feeding.",
    filter_type = "_endline",
    df = endline_kpm_long,
    n_digits = 2
)


# brick classes

class_results_itt <- feols(c(class_1_production_share_percent,
                             class_1_5_production_share_percent,
                             class_2_production_share_percent,
                             class_3_production_share_percent,
                             breakages_production_share_percent) ~ treatment_bundled | blocks,
                 vcov = "hetero",
                 data = endline_kpm_long) %>%
    lapply(tidy, conf.int = T) %>%
    bind_rows(.id = "outcome")

class_results_tot <- feols(c(class_1_production_share_percent,
                             class_1_5_production_share_percent,
                             class_2_production_share_percent,
                             class_3_production_share_percent,
                             breakages_production_share_percent)  ~ 1 | blocks | adopter_kpm ~ treatment,
                 vcov = "hetero",
                 data = endline_kpm_long) %>%
    lapply(tidy, conf.int = T) %>%
    bind_rows(.id = "outcome")


class_results <- bind_rows(class_results_itt, class_results_tot) %>%
    mutate(
        model = case_when(
            term  == "treatment_bundledg" ~ "ITT",
            T ~ "IV"
        ),
        class = str_remove(outcome, "lhs: "),
        class = factor(class, ordered = T,
                         levels = c("class_1_production_share_percent",
                                    "class_1_5_production_share_percent",
                                    "class_2_production_share_percent",
                                    "class_3_production_share_percent",
                                    "breakages_production_share_percent"),
                         labels = c("Class 1",
                                    "Class 1.5",
                                    "Class 2",
                                    "Class 3",
                                    "Broken Bricks")
        )
    )

class_results %>%
    ggplot(aes(x = class, y = estimate, color = model)) +
        geom_point(position = position_dodge(0.3), size = 1.5) +
        geom_errorbar(aes(ymin = conf.low, ymax = conf.high, color = model),
                      position = position_dodge(0.3),
                      width = 0) +
        scale_x_discrete(labels = function(x) str_wrap(x, width = 14)) +
        scale_y_continuous(breaks = scales::pretty_breaks(5)) +
        geom_hline(yintercept = 0, linetype = "dashed") +
        scale_color_jama() +
        theme_minimal() +
        theme(
            panel.grid.major.x = element_blank(),
            panel.grid.minor = element_blank(),
            axis.text.x = element_text(face = "bold", size = 9),
            legend.text =  element_text(size = 8),
            legend.position = "bottom"
        ) +
        labs(
            x = NULL,
            y = "Treatment Effect",
            color = NULL
        )


ggsave(filename = here("output/supplement/endline_brick_class_results.pdf"),
       width = 6, height = 4)
```


## CO/CO2 Ratio supplementary analysis

```{r supp-co-co2 analysis}
co_co2_vars <- c(
    "Mean CO/$\\text{CO}_{2}$ Ratio\\label{tab:mean_co_co2}" = "average_co_co2_ratio_calculated",
    "Max CO/$\\text{CO}_{2}$ Ratio\\label{tab:max_co_co2}" = "max_co_co2_ratio_calculated",
    "SD CO/$\\text{CO}_{2}$ Ratio\\label{tab:sd_co_co2}" = "sd_co_co2_ratio_calculated",
    "IQR CO/$\\text{CO}_{2}$ Ratio\\label{tab:iqr_co_co2}" = "iqr_co_co2_ratio_calculated"
)


# Additional outcomes: full sample
co_co2_all <- lapply(
    names(co_co2_vars),
    dep_var_tests,
    dep_vars = co_co2_vars,
    formula_stubs = formula_stubs, 
    groups = coefnames,
    note = "Heteroskedasticity-robust standard errors are in parentheses. Regressions include randomization strata fixed effects. Outcome data are derived from measurements collected during kiln performance monitoring. Adoption is defined as adopting double/triple zigzag brick stacking and single fireman continuous feeding.",
    df = endline_kpm_long,
    n_digits = 4
)



# All outcomes: dropping kilns with \% feeding time < 33\%

co_co2_vars <- c(
    "Mean CO/$\\text{CO}_{2}$Ratio (dropping abnormal feeding)\\label{tab:mean_co_co2_feedingDrop}" = "average_co_co2_ratio_calculated",
    "Max CO/$\\text{CO}_{2}$ Ratio (dropping abnormal feeding)\\label{tab:max_co_co2_feedingDrop}" = "max_co_co2_ratio_calculated",
    "SD CO/$\\text{CO}_{2}$ Ratio (dropping abnormal feeding)\\label{tab:sd_co_co2_feedingDrop}" = "sd_co_co2_ratio_calculated",
    "IQR CO/$\\text{CO}_{2}$ Ratio (dropping abnormal feeding)\\label{tab:iqr_co_co2_feedingDrop}" = "iqr_co_co2_ratio_calculated"
)

co_co2_pctfeeding_restrict <- lapply(
    names(co_co2_vars), 
    dep_var_tests, 
    dep_vars = co_co2_vars,
    formula_stubs = formula_stubs, 
    groups = coefnames,
    note = "Heteroskedasticity-robust standard errors are in parentheses. Regressions include randomization strata fixed effects. Outcome data are derived from measurements collected during kiln performance monitoring. Adoption is defined as adopting double/triple zigzag brick stacking and single fireman continuous feeding. Sample excludes kilns with total feeding time below 33\\%, which indicates abnormal operation.",
    df = endline_kpm_long[!endline_kpm_long$pct_time_feeding_below_33, ],
    n_digits = 4,
    filter_type = "_feedingDROP"
)

# All outcomes: keeping only kilns where the $\mathrm{O}_2\%$,  $\mathrm{CO}_2\%$, and  $\mathrm{CO}$ were within normal ranges for more than 50\% of the time
co_co2_vars <- c(
    "Mean CO/$\\text{CO}_{2}$ Ratio (dropping abnormal values)\\label{tab:mean_co_co2_abnormalDrop}" = "average_co_co2_ratio_calculated",
    "Max CO/$\\text{CO}_{2}$ Ratio (dropping abnormal values)\\label{tab:max_co_co2_abnormalDrop}" = "max_co_co2_ratio_calculated",
    "SD CO/$\\text{CO}_{2}$ Ratio (dropping abnormal values)\\label{tab:sd_co_co2_abnormalDrop}" = "sd_co_co2_ratio_calculated",
    "IQR CO/$\\text{CO}_{2}$ Ratio (dropping abnormal values)\\label{tab:iqr_co_co2_abnormalDrop}" = "iqr_co_co2_ratio_calculated"
)


abnormal_range_restrict <- lapply(
    names(co_co2_vars), 
    dep_var_tests, 
    dep_vars = co_co2_vars,
    formula_stubs = formula_stubs,
    groups = coefnames,
    note = "Heteroskedasticity-robust standard errors are in parentheses. Regressions include randomization strata fixed effects. Outcome data are derived from measurements collected during kiln performance monitoring. Adoption is defined as adopting double/triple zigzag brick stacking and single fireman continuous feeding. Sample excludes kilns with O2, $\text{CO}_{2}$, and CO outside normal ranges for more than 50\\\\% of the monitored time, which indicates abnormal operation.",
    df = endline_kpm_long[!endline_kpm_long$over_50_out_of_range, ],
    n_digits = 4,
    filter_type = "_abnormalDROP"
)

# All outcomes: dropping based on feeding time & normal ranges
co_co2_vars <- c(
    "Mean CO/$\\text{CO}_{2}$ Ratio (dropping abnormal feeding \\& values)\\label{tab:mean_co_co2_abnormal_feedingDrop}" = "average_co_co2_ratio_calculated",
    "Max CO/$\\text{CO}_{2}$ Ratio (dropping abnormal feeding \\& values)\\label{tab:max_co_co2_abnormal_feedingDrop}" = "max_co_co2_ratio_calculated",
    "SD CO/$\\text{CO}_{2}$ Ratio (dropping abnormal feeding \\& values)\\label{tab:sd_co_co2_abnormal_feedingDrop}" = "sd_co_co2_ratio_calculated",
    "IQR CO/$\\text{CO}_{2}$ Ratio (dropping abnormal feeding \\& values)\\label{tab:iqr_co_co2_abnormal_feedingDrop}" = "iqr_co_co2_ratio_calculated"
)

both_restrict <- lapply(
    names(co_co2_vars), 
    dep_var_tests,
    dep_vars = co_co2_vars,
    formula_stubs = formula_stubs, 
    groups = coefnames,
    note = "Heteroskedasticity-robust standard errors are in parentheses. Regressions include randomization strata fixed effects. Outcome data are derived from measurements collected during kiln performance monitoring. Adoption is defined as adopting double/triple zigzag brick stacking and single fireman continuous feeding. Sample excludes kilns with O2, CO2, CO outside normal ranges for more than 50% of the monitored time and with total feeding time below 33%, which indicates abnormal operation.",
    df = endline_kpm_long[!endline_kpm_long$over_50_and_below_33, ],
    n_digits = 4,
    filter_type = "_abnormal_feedingDROP"
)

```

## Alternative VOP & Fuel Spending

```{r alt-vop-fuel, results="asis"}

alt_vop_fuel_vars <- c(
     "Total Value of Production (lakh BDT)\\label{tab:total_vop_kpm}" = "total_revenue_lakh_bdt_kpm__median",
     "Endline Total Value of Production (lakh BDT)" = "total_revenue_lakh_bdt_endline__median",
     "Total Fuel Spending (lakh BDT)\\label{tab:total_fuel_spending_kpm}" = "total_fuel_spending_lakh_bdt_kpm__medprice",
     "Endline Total Fuel Spending (lakh BDT)\\label{tab:total_fuel_spending_endline}" = "total_fuel_spending_lakh_bdt_endline__medprice",
    "Coal Spending Per Brick (BDT/100,000 bricks)\\label{tab:coal_spending_per_brick_kpm}" = "coal_spending_per_brick_kpm__medprice",
     "Total Coal Spending (lakh BDT)\\label{tab:total_coal_spending_kpm}" = "total_coal_spending_lakh_bdt_kpm__medprice",
  "Number of Bricks Counted During KPM \\label{tab:total_number_bricks_kpm}" = "total_number_bricks",
     "Total Bricks Fired During KPM \\label{tab:total_bricks_fired_kpm}" = "total_bricks_fired"
)

alt_vop_fuel_analysis <- lapply(
    names(alt_vop_fuel_vars),
    dep_var_tests,
    dep_vars = alt_vop_fuel_vars,
    formula_stubs = formula_stubs,
    groups = coefnames,
    note = "Heteroskedasticity-robust standard errors are in parentheses. Regressions include randomization strata fixed effects.",
    #filter_type = "_endline",
    df = endline_kpm_long,
    n_digits = 2
)


```

## Changes in other costs

```{r fuel-costs, results="asis}

cost_vars <- c(
     "Sawdust Spending (BDT/ton)" = "sawdust_spending_bdt_kpm__medprice",
    "Soil (BDT per 1000 bricks)" = "soil_cost_bdt_per_1k_bricks",
    "Molding (BDT per 1000 bricks)" = "clay_prep_moulding_cost_bdt_per_1k_bricks",
    "Coal preparation (BDT per season)" = "coal_prep_cost_bdt_per_season",
    "Brick loading (BDT per 1000 bricks)" = "brick_loading_cost_bdt_per_1k_bricks",
    "Firemen cost (BDT per season)" = "firemen_cost_bdt_per_season",
    "Brick unloading (BDT per 1000 bricks)" = "brick_unloading_cost_bdt_per_1k_bricks"
)



cost_analysis <- lapply(
    names(cost_vars),
    dep_var_tests,
    dep_vars = cost_vars,
    formula_stubs = formula_stubs,
    groups = coefnames,
    note = "Heteroskedasticity-robust standard errors are in parentheses. Regressions include randomization strata fixed effects.",
    df = endline_kpm_long,
    n_digits = 2
)



```


## Heterogeneous Effects

```{r hetero_vars, results="asis"}

dat <- endline_kpm_long %>% 
    inner_join(baseline %>% select(kiln_id, kiln_owner_experience_years,
                                   higher_sec_plus, highland)) %>%
    mutate(
        treatment_bundled = as.numeric(treatment_bundled),
        treatXexp = treatment_bundled*kiln_owner_experience_years,
        treatXedu = treatment_bundled*higher_sec_plus,
        treatXhighland = treatment_bundled*highland 
    )


setFixest_fml(..vars = ~sw(kiln_owner_experience_years + treatXexp, 
                           higher_sec_plus + treatXedu,
                           highland + treatXhighland))

heterogenous_mods <- feols(c(sec, pct_class_1_brick) ~ treatment_bundled + ..vars | blocks,
      vcov = "hetero",
      data = dat)



names(heterogenous_mods) <- c("Experience", "Education",
                        "Location", "Experience", "Education",
                        "Location")

gm <- list(
        list("raw" = "nobs", "clean" = "N", "fmt" = "%.0f"))
    
het_coefs <-  c( 
    "treatment_bundled" = "Bundled Treatment",
    "kiln_owner_experience_years" = "Owner Experience",
    "treatXexp" = "Treatment X Owner Experience",
    "higher_sec_plus" = "Higher Secondary+",
    "treatXedu" = "Treatment X Higher Secondary+",
    "highland" = "Highland",
    "treatXhighland" = "Treatment X Highland"
)


modelsummary(heterogenous_mods,
             fmt = fmt_sprintf(paste0("%.2f")),
             gof_map = gm, statistic = "std.error",
             stars = TRUE,
             coef_map = het_coefs,
             label = knitr::opts_current$get('label') %>%
                    str_replace_all("_", "-"),
              title = "Heterogeneous Treatment Effects\\label{tab:heterogeneous}",
                escape = TRUE, 
                output = "latex"
             ) %>%
    kableExtra::kable_styling(
        font_size = 11,
        latex_options = c("HOLD_position")) %>%
    column_spec(column = 1, width = "1.1in") %>%
    column_spec(column = 2:6, width = "0.8in") %>%
    add_header_above(c(" ", "Specific Energy Consumption" = 3, "Class 1 Bricks (%)" = 3)) %>%
    footnote(
        general = "Heteroskedasticity-robust standard errors are in parentheses. Regression includes randomization strata fixed effects. Kiln characteristics (owner experience, education, and kiln location) are from baseline data and outcomes are derived from the kiln performance monitoring. Kiln owner experience is measured in years, higher secondary+ is a binary indicator that equals 1 if the owner has attained higher secondary schooling or more, and  highland indicates a kiln is located on highland (as opposed to lowland).",
        threeparttable = TRUE, fixed_small_size = TRUE,
        escape = FALSE, footnote_as_chunk = TRUE) %>%
    save_kable(
        file = here("output/supplement/heterogeneous_effects.tex")
    )

```
